************************************************************************
/* This Do File will create Tables and Figures from 
Agan, Doleac, and Harvey "Misdemeanor Prosecution"

You will need

 *0.  the data created by "1_createstimation.dta", in the path folder set below
 *1.  "ivregcomplier2.do" modified from from Bhuller et al. (2020, JPE) (provided)
 *2.  arci_poly.do, created by Amanda Agan to calculate Anderson-Rubin CIs (provided)
 *3.  mtefe package downloadable from ssc

 for appendix tables:
 *4.  manyiv command from Hull, Goldsmith-Pinkham, Kolesar (2022) 
      "Contamination Bias in Linear IV regression" (contact authors directly)
 *5.  ivlasso package downloadable from ssc
 *6.  testjfe package downloadable from ssc

*/
************************************************************************
clear 

* set your path where the .dta files created in 1_createstimationsample are
global path "/Users/aagan/Dropbox/Prosecutorial Reform Initiative/Suffolk/Do/QJE Replication Archive/"
cd "$path"

************************************************************************
/* CHOOSE SAMPLE SET FE/CLUSTERS */

************************************************************************
global length "twoyears"

* CHOOSE FE 
global FE="court_month2 court_dow2"

* CHOOSE CLUSTERS
global clusters="id_prsn_dfndnt first_pros"


global covars " number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen  type_pettybs_only type_mv_only type_drug_only male  age2 age3 age4  predwhi predbla predhis"


************************************
/* Open Data */
************************************ 

use "$path/suffolk_est_${length}_demos.dta", clear

************************************************************************
************************************************************************
************************************************************************
						/* MAIN TABLES */
************************************************************************
************************************************************************
************************************************************************

************************************************************************
* TABLE I: SUMMARY STATISTICS
************************************************************************ 

global sum_vars "ng_immed_all number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen  type_pettybs_only type_mv_only type_drug_only  type_uncat_only  victimless male age1 age2 age3 age4  predwhi predbla predhis bail_requested_arr bail_set  days_to_last_event num_events  dcjis_match conviction"


sum $sum_vars anyr_${length}_arrest2 numr_${length}_arrest2  

sum $sum_vars anyr_${length}_arrest2 numr_${length}_arrest2   if  ng_immed_all==0 

sum $sum_vars anyr_${length}_arrest2 numr_${length}_arrest2   if  ng_immed_all==1



************************************************************************
* TABLE II: FIRST STAGE
************************************************************************ 
* use ivreghdfe to ensure exactly the same as what we are using below
* it doesn't matter what dep var you put here since for now just extracting first stage

ivreghdfe days_to_last  (ng_immed_all = ADA_iv),  cluster($clusters ) absorb($FE) first 

ivreghdfe days_to_last (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE) first 


*******************************************************************************
* TABLE II: OLS/SECOND STAGE/RF
*******************************************************************************

global outcomes anyr_${length}_arrest2  numr_${length}_arrest2 


foreach var of varlist $outcomes  {

	* Col 1, OLS (no covariates)
	reghdfe `var' ng_immed_all, vce(cluster $clusters) absorb($FE)


	* Col 2, OLS (covariates)
	reghdfe `var' ng_immed_all $covars, vce(cluster $clusters) absorb($FE)
		
 
	* Col 3, IV (no covariates)
	ivreghdfe `var' (ng_immed_all = ADA_iv) ,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
	* Calculate Anderson Rubin Confidence Intervals (requires that ivreghdfe using both savefirst and saverf with prefixes f_ and r_)
	qui do arci_poly.do
	matrix list ar_l
	matrix list ar_u
	
	* Col 4, IV (covariates)
	ivreghdfe `var' (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
	* Calculate Anderson Rubin Confidence Intervals (requires that ivreghdfe using both savefirst and saverf with prefixes f_ and r_)
	qui do arci_poly.do
	matrix list ar_l
	matrix list ar_u

	
	* Col 5, reduced form
	reghdfe `var' ADA_iv $covars, vce(cluster $clusters) absorb($FE )
	
	** Mean dependent variable and mean for compliers (based of ivregcomplier2 from Bhuller et al.) 
	sum `var' if ng_immed_all==0
	ivregcomplier2, y(`var') d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__`var')
    sum __`var' if ______=="E[Y | D = 0, Complier]"

	drop ______ __`var' 
	
}

*******************************************************************************
* TABLE IV: Second Stage by defendant criminal history
*******************************************************************************

 * Col 1: no previous complaint
gen hp1=def_sequence2==1
 * Col 2: Has a previous complaint
gen hp2=def_sequence2>1
 * Col 3: no previous dcjis
gen hp3=hasprev_dcjis==0
 * Col 4: previous dcjis, no conviction
gen hp4=hasprev_dcjis==1 & hasprev_conv==0
 * Col 5: previous dcjis, prev conviction
gen hp5=hasprev_dcjis==1 & hasprev_conv==1

forvalues cond=1(1)5 {
	ivreghdfe anyr_${length}_arrest2  (ng_immed_all = ADA_iv) $covars if  hp`cond'==1,  cluster($clusters ) absorb($FE) savefirst savefprefix(f_) saverf saverfprefix(r_)
	qui do arci_poly.do
	matrix list ar_l
	matrix list ar_u
	sum anyr_${length}_arrest2 if  hp`cond'==1 & ng_immed_all==0
	qui ivregcomplier2 if hp`cond'==1, y(anyr_${length}_arrest2) d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__test)
	sum __test if ______=="E[Y | D = 0, Complier]"
	drop ______ __test
}

*******************************************************************************
* TABLE V: Second Stage by subsequent complaint type
*******************************************************************************

* create some vars
* Note following requires ssc install rangestat
sort id_prsn_dfndnt def_sequence2
by id_prsn_dfndnt: gen prev_charge=ng_immed_all[_n-1]==0
by id_prsn_dfndnt: gen prev_ADA_iv=ADA_iv[_n-1]
by id_prsn_dfndnt: gen prev_first_pros=first_pros[_n-1]
by id_prsn_dfndnt: gen prev_court_month2 = court_month2[_n-1]
by id_prsn_dfndnt: gen prev_court_dow2 =court_dow2[_n-1]
by id_prsn_dfndnt: gen same_agency=(agency_code==agency_code[_n-1]) 
by id_prsn_dfndnt: gen diff_agency=(agency_code!=agency_code[_n-1])
replace same_agency=. if ( mi(agency_code) | mi(agency_code[_n-1]) )
replace diff_agency=. if ( mi(agency_code) | mi(agency_code[_n-1]) )

sort id_prsn_dfndnt agency_code first_event_date
rangestat (count) numr_twoyears_sameagency=id_prsn_dfndnt, int(first_event_date 1 730) by(id_prsn_dfndnt agency_code)
replace numr_twoyears_sameagency=0 if numr_twoyears_sameagency==.  

gen numr_twoyears_diffagency=numr_twoyears_arrest2-numr_twoyears_sameagency

gen anyr_twoyears_sameagency=numr_twoyears_sameagency>0
gen anyr_twoyears_diffagency=numr_twoyears_diffagency>0

gen anyr_twoyears_disc=(anyr_twoyears_nodiscretion==0) & (anyr_twoyears_arrest2==1)
gen anyr_twoyears_nodisc=anyr_twoyears_nodiscretion

replace anyr_twoyears_same=. if agency_code==""
replace anyr_twoyears_diff=. if agency_code==""


* regressions

global outcomes  anyr_twoyears_nodisc anyr_twoyears_disc  anyr_twoyears_victim anyr_twoyears_novictim  anyr_twoyears_same anyr_twoyears_diff

foreach var of varlist $outcomes  {
	
	* do it for just first time defendants
	ivreghdfe `var' (ng_immed_all = ADA_iv) $covars if  def_sequence2==1,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
	* Calculate Anderson Rubin Confidence Intervals (requires that ivreghdfe using both savefirst and saverf with prefixes f_ and r_)
	qui do arci_poly.do
	matrix list ar_l
	matrix list ar_u
	ivregcomplier2  if def_sequence2==1, y(`var') d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__`var')
	sum __`var' if ______=="E[Y | D = 0, Complier]"
	drop ______ __`var' 
	
	
	* do it repeat defendants
	ivreghdfe `var' (ng_immed_all = ADA_iv) $covars if  def_sequence2>1,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
	* Calculate Anderson Rubin Confidence Intervals (requires that ivreghdfe using both savefirst and saverf with prefixes f_ and r_)
	qui do arci_poly.do
	matrix list ar_l
	matrix list ar_u
	ivregcomplier2  if def_sequence2>1, y(`var') d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__`var')
	sum __`var' if ______=="E[Y | D = 0, Complier]"
	drop ______ __`var' 
	

}

*******************************************************************************
* TABLE VI: ADA IMPUTATIONS
*******************************************************************************

use "$path/suffolk_est_twoyears_imputation.dta", clear

*** PANEL A: OLS amongst missing and non-missing ***


* need a sample that doesn't include !mi(first_pros)
global sample2 "first_event_year>2003 & first_event_date<21428  & muni_district_court==1 & felony_correct_any==0 & type_violent==0 & type_weapons==0 & any_missing==0  & singleton!=1 " 
 
reghdfe anyr_twoyears_arrest2 ng_immed_all $covars if $sample2 & !mi(first_pros) & n_pros>=30, vce(cluster id_prsn_dfndnt) absorb($FE)

sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample2 & mifp==1

reghdfe anyr_twoyears_arrest2 ng_immed_all $covars if $sample2 & mi(first_pros), vce(cluster id_prsn_dfndnt) absorb($FE)

sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample2 & mifp==0

*  Need to drop singletons from regression 2 two above, below 
 cap drop insample2 singleton2
 gen insample2=($sample2 & mi(first_pros))
 bys $timeFE insample2: gen singleton2=(_N==1)
 tab singleton2 if $sample2

reghdfe anyr_twoyears_arrest2 ng_immed_all $covars if $sample2 & (n_pros>=30 | mi(n_pros)) & singleton2!=1, vce(cluster id_prsn_dfndnt) absorb($FE)

sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample2 & (n_pros>=30 | mi(n_pros)) & singleton2!=1


*** PANEL B: Within low missing-ness years and courts ***

gen miss_ADA= first_pros==.

tab miss_ADA if first_event_year>2003 & first_event_date<21428  & muni_district_court==1 & felony_correct_any==0 & type_violent==0 & type_weapons==0 

replace miss_ADA=. if !(first_event_year>2003 & first_event_date<21428  & muni_district_court==1 & felony_correct_any==0 & type_violent==0 & type_weapons==0)

bys code_crt_lctn: egen miss_ADA_court=mean(miss_ADA)
by code_crt_lctn: sum miss_ADA_court

bys first_event_year: egen miss_ADA_year=mean(miss_ADA)
by first_event_year: sum miss_ADA_year if first_event_year>2003



* SBO: 39%; WROX: 48%; EBOS 42%
gen lowcourt=miss_ADA_court<.50 
gen lowyear=miss_ADA_year<.60
 
 
ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = ADA_iv) $covars if  $sample & lowcourt==1,    cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u
sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample & lowcourt==1
qui ivregcomplier2 if $sample & lowcourt==1, y(anyr_twoyears_arrest2) d(ng_immed_all) x($covars ) z(ADA_iv_imp) fe($FE) cluster($clusters) ztrim(1) name(__`i')
sum __`i' if ______=="E[Y | D = 0, Complier]"
drop ______ __`i' 
** p-value from first-stage
qui reghdfe ADA_iv_imp $covars  if $sample & lowcourt==1, vce(cluster $clusters) absorb($FE)
testparm $covars

ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = ADA_iv) $covars if  $sample & lowyear==1,    cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u
sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample & lowyear==1
qui ivregcomplier2 if $sample & lowyear==1, y(anyr_twoyears_arrest2) d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__`i')
sum __`i' if ______=="E[Y | D = 0, Complier]"
drop ______ __`i' 
** p-value from first-stage
qui reghdfe ADA_iv_imp $covars  if $sample & lowyear==1, vce(cluster $clusters) absorb($FE)
testparm $covars
	


*** PANEL C: Within Various Imputation Samples ***
** First set sample for those not missing first pros to create the ADA_iv
gen first_pros_imp0=first_pros
forvalues i=0(1)4{
	
	** CHOOSE CLUSTERS (needs to have first_pros_imp`i')
global clusters_imp="id_prsn_dfndnt first_pros_imp`i'"

  ** reset sample to include people not missing the imputed version
global sample_imp "first_event_year>2003 & first_event_date<21428  & muni_district_court==1 & felony_correct_any==0 & first_pros_imp`i'~=. & type_violent==0 & type_weapons==0  & (n_pros_imp>=30 & !mi(n_pros_imp)) & any_missing==0  & singleton!=1" 

* MAIN REGRESSION (using old ADA_iv means imputed to new people)
ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = ADA_iv_imp) $covars if  $sample_imp,    cluster($clusters_imp ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)

qui do arci_poly.do
matrix list ar_l
matrix list ar_u

sum anyr_twoyears_arrest2 if ng_immed_all==0 & $sample
ivregcomplier2 if $sample_imp, y(anyr_twoyears_arrest2) d(ng_immed_all) x($covars ) z(ADA_iv_imp) fe($FE) cluster($clusters_imp) ztrim(1) name(__`i')
sum __`i' if ______=="E[Y | D = 0, Complier]"
drop ______ __`i' 
	
**p-value from first-stage
reghdfe ADA_iv_imp $covars  if $sample_imp, vce(cluster $clusters_imp) absorb($FE)
testparm $covars

}


** in the proportion missing ADA
forvalues i=0(1)4{
	gen mifp`i'=mi(first_pros_imp`i')
	gen mifp`i'_np=mi(first_pros_imp`i') if ng_immed_all==1
	gen mifp`i'_p=mi(first_pros_imp`i') if ng_immed_all==0
	
	
	sum mifp`i'_p
	sum mifp`i'_np 

}

*******************************************************************************
* TABLE VII: ROLLINS INAUG RESULTS
*******************************************************************************

use "$path/suffolk_est_oneyears_demos_rollins.dta", clear


*OLS 
ivreghdfe anyr_oneyear_arrest2 ng_immed_all i.court_location_case  i.first_event_month i.first_event_dow $covars if  felony_correct_any==0, robust cluster(id_prsn_dfndnt) first

sum anyr_oneyear_arrest2 if ng_immed_all==0  & felony_correct_any==0

*IV DD no control group

ivreghdfe anyr_oneyear_arrest2 (ng_immed_all=post_rachael) i.court_location_case  i.first_event_month i.first_event_dow $covars if felony_correct_any==0, robust cluster(id_prsn_dfndnt) first 


*IV DD control group

ivreghdfe anyr_oneyear_arrest2 (ng_immed_all=post_rachael_nv) nv_misd post_rachael i.court_location_case  i.first_event_month i.first_event_dow $covars, robust cluster(id_prsn_dfndnt) first 
	
*reduced form no control group
reg anyr_oneyear_arrest2 post_rachael i.court_location_case  i.first_event_month i.first_event_dow $covars if  felony_correct_any==0, robust cluster(id_prsn_dfndnt) 

*reduced form control group

reg anyr_oneyear_arrest2 post_rachael_nv nv_misd post_rachael i.court_location_case  i.first_event_month i.first_event_dow $covars, robust cluster(id_prsn_dfndnt) first

************************************************************************
************************************************************************
************************************************************************
					/* MAIN FIGURES */
************************************************************************
************************************************************************
************************************************************************

use "$path/suffolk_est_${length}_demos.dta", clear

*******************************************************************************
* Figure I: Distribution of leniency measure + non-parametric first stage
*******************************************************************************


areg ng_immed_all ADA_iv i.court_dow2, absorb(court_month2)
predict resid_ADA_iv, resid
gen resid_ADA = resid_ADA_iv + _b[ADA_iv]*ADA_iv + _b[_cons]

lpoly resid_ADA ADA_iv, nograph degree(1) bw(0.04) gen(fs_x fs_y) n(100) se(se)

gen upper = fs_y + 1.96*se
gen lower = fs_y - 1.96*se

sum ADA_iv, det
local bottom=r(p1)
local top=r(p99)

* Create the figure
twoway hist ADA_iv if abs(ADA_iv)<=.15, width(.005) frac fcolor(gs10) lcolor(white) yaxis(1)  ///
	|| line fs_y fs_x if fs_x>=`bottom' & fs_x<=`top', lc(black) lw(.6) yaxis(2) ///
	|| line upper fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	|| line lower fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	title("", size(large) color(black))  ///
	ytitle("Fraction of Sample", size(medlarge) axis(1))  /// 
	ytitle("Residualized Rate of Nonprosecution", size(medlarge) axis(2)) ///
	xtitle("ADA Leniency", size(medium)) ///
	legend(off) ///
	ylabel(0(.025).10 , nogrid axis(1)) ///
	ylabel(0.2(.05)0.35, nogrid axis(2)) ///
	xlabel(-0.15(.05)0.15 , nogrid) ///
	graphregion(color(white)) bgcolor(white) 
	
*******************************************************************************
* Figure II: LATE for Criminal Complaint within X Months
*******************************************************************************
* makes looping easier
gen anyr_12months_arrest2=anyr_oneyear_arrest2
gen anyr_24months_arrest2=anyr_twoyears_arrest2

* for the 2 year sample:
forvalues i=3(3)24 {	
		qui eststo iv`i': ivreghdfe anyr_`i'months_arrest2 (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)
}

* figure settings
global off=0.3
global foff=0.6
global color="black"
global mcolor="gs9"
global fcolor="gs12"
global mpattern="longdash"
global fpattern="shortdash"

coefplot (iv3, mcolor($color) ciopts(lcolor($color)) label(All) pstyle(p1)) (iv6, mcolor($color) ciopts(lcolor($color)) nokey) (iv9, mcolor($color) ciopts(lcolor($color )) nokey) (iv12, mcolor($color) ciopts(lcolor($color)) nokey) (iv15, mcolor($color) ciopts(lcolor($color)) nokey ) (iv18, mcolor($color) ciopts(lcolor($color)) nokey) (iv21, mcolor($color) ciopts(lcolor($color)) nokey) (iv24, mcolor($color) ciopts(lcolor($color)) nokey) ///
 , keep(ng_immed_all) vertical scheme(s1mono) mstyle(p1) yline(0)  aseq ytitle(IV Estimates of Effect of Nonprosecution) xtitle("Criminal Complaint Within X Months") eqrename(iv3=3 iv6=6 iv9=9  iv12=12 iv15=15 iv18=18 iv21=21 iv24=24 ivm3=3 ivm6=6 ivm9=9  ivm12=12 ivm15=15 ivm18=18 ivm21=21 ivm24=24 ivf3=3 ivf6=6 ivf9=9  ivf12=12 ivf15=15 ivf18=18 ivf21=21 ivf24=24  ) at(_eq) xlabel(3 6 9 12 15 18 21 24) yla(-.5(.1).1) legend(off)
 
*******************************************************************************
* Figure III: LATE with different time horizons
*******************************************************************************

foreach i in one two three four five six {
	
	qui use "$path/suffolk_est_`i'years_demos.dta", clear
		 qui  rename anyr_oneyear_arrest2 anyr_oneyears_arrest2

	qui eststo iv`i': ivreghdfe anyr_`i'years_arrest2 (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)
	
	
}

* at this point the 6 year sample is open

foreach i in one two three four five six {
  qui eststo iv`i'_6: ivreghdfe anyr_`i'years_arrest2 (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)
}

global off=0.1
global color="black"
global mcolor="gs8"
global mpattern="longdash"

coefplot (ivone, mcolor($color) ciopts(lcolor($color)) label(Full Sample Available) pstyle(p1)) (ivtwo, mcolor($color) ciopts(lcolor($color)) nokey) (ivthree, mcolor($color) ciopts(lcolor($color )) nokey) (ivfour, mcolor($color) ciopts(lcolor($color)) nokey) (ivfive, mcolor($color) ciopts(lcolor($color)) nokey ) (ivsix, mcolor($color) ciopts(lcolor($color)) nokey) ///
 (ivone_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern)) label(In 6 Year Sample Only (<2015)) pstyle(p1) ) (ivtwo_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern)) nokey) (ivthree_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern)) nokey) (ivfour_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern))  nokey )  (ivfive_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern)) nokey ) (ivsix_6, offset($off) mcolor($mcolor) ciopts(lcolor($mcolor) lpattern($mpattern)) nokey) ///
 , keep(ng_immed_all) vertical scheme(s1mono) mstyle(p1) yline(0)  aseq ytitle(IV Estimates of Effect of Nonprosecution) xtitle("Criminal Complaint Within X Years") eqrename(ivone=1 ivtwo=2 ivthree=3  ivfour=4 ivfive=5 ivsix=6  ivone_6=1 ivtwo_6=2 ivthree_6=3  ivfour_6=4 ivfive_6=5 ivsix_6=6    ) at(_eq) xlabel(1 2 3 4 5 6) yla(-.5(.1).1) legend(symxsize(*1.5))


*******************************************************************************
* Figure IV: MTE  
*******************************************************************************
use "$path/suffolk_est_${length}_demos.dta", clear

*set bootstrap replications
global bootreps 100

* main figure is 3rd order polynomial, see Appendix for other versions
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), link(logit) poly(3) trimsupport(0.01) bootreps($bootreps )  vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2)



************************************************************************
************************************************************************
************************************************************************
					/* Appendix Tables */
************************************************************************
************************************************************************
************************************************************************

use "$path/suffolk_est_${length}_demos.dta", clear

*******************************************************************************
* Table A.1 Different IV estimation Strategies
*******************************************************************************

egen fp_court=group(first_pros court_month2)

egen fp_court_dow=group(first_pros court_month2 court_dow2)

qui tab court_month2 , gen(cm)
qui tab court_dow2 , gen(cd)

* for the full ADA dummies
qui tab first_pros , gen(fp)

* Col 1 just repeats an estimation above

* Col 2 all the first pros dummies as instruments
ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = fp*) $covars,  cluster($clusters ) absorb($FE)  

* Col 3 all dummies LIML
ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = fp*) $covars,  cluster($clusters ) absorb($FE)   liml

* Col 4 UJIVE: manyIV package will give you Cols (2) (3) and (4) though with only a single cluster (first_pros) - but faster than above!
manyiv anyr_twoyears_arrest2 (ng_immed_all = fp*)  $covars, cluster(first_pros ) absorb($FE) skipsingletons

* Col 5 interacted UJIVE
manyiv anyr_twoyears_arrest2 (ng_immed_all = .)  $covars, cluster(first_pros ) absorb($FE) skipsingletons absorbiv(fp_court_dow) forcejive

* Col 6 lasso ("plug-in" penalty using ivlasso defaults)
xtset court_month2  /* can use FE to partial out some of the FE, the rest by hand */
ivlasso anyr_twoyears_arrest2 $covars cd* (ng_immed_all=fp*),  cluster($cluster) fe first idstats post(plasso)

*******************************************************************************
* Table B.1 Randomization
*******************************************************************************

sum ADA_iv, det
gen ADA_iv_std=(ADA_iv-r(mean))/r(sd)


reghdfe ng_immed_all $covars, vce(cluster $clusters) absorb($FE) 
testparm $covars

reghdfe ADA_iv_std $covars, vce(cluster $clusters) absorb($FE)
testparm $covars

*******************************************************************************
* Table B.2 First Stage in Subsamples (Average monotonicity check)

*******************************************************************************

local  ifs `" "number_counts_correct<=1" "number_counts_correct>1" "number_misd_correct<=1" "number_misd_correct>1" "number_misd_high==0" " number_misd_high>0" "conviction_misd_oneyearpriorc ==0" "conviction_misd_oneyearpriorc ==1" "conviction_felony_oneyearpriorc==0" "conviction_felony_oneyearpriorc==1" "citizen==0"  "citizen==1"  "type_pettybs_plus==1" "type_mv_plus==1" "type_drug==1" "type_uncat==1" "victimless==1" "victimless==0" "black==1" "hispanic==1" "white==1" "male==1" "male==0" "age_at_filing2<=25" "age_at_filing2>25" "race_miss" "predbla_max" "predwhi_max" "predhis_max" "predother_max" "'

local n=1

foreach if of local ifs {
	di "`if'"
	reghdfe ng_immed_all ADA_iv  if `if', vce(cluster $clusters) absorb($FE)
	qui local n=`n'+1
} 

*******************************************************************************
* Table B.3 FLL Test
* requires testjfe  from Frandsen, Lefgren, Leslie
*******************************************************************************

* command doesn't take factor variables
	qui tab first_event_ym, gen(ym)
	
	qui tab  code_crt_lctn, gen(ct)
	qui tab first_event_dow, gen(day)
	

	* given that, do it WITHIN each court but with dow and ym FE
	
	gen strict_mono=0

	foreach court in DOR ROX BMC WROX EBOS SBO CHE  BRI CHA    {
		di "`court'"
		count if code_crt_lctn=="`court'"
		testjfe anyr_twoyears_arrest2 ng_immed_all fp* if code_crt_lctn=="`court'", covariates($covars ym* day1-day7) fitweight(1) numknots(3)
		
		replace strict_mono=1 if r(pval)<=0.100 & code_crt_lctn=="`court'"

	}

*******************************************************************************
* Table B.4 ADA Leniency and Post-Arraignment Case Outcomes
*******************************************************************************
* use ivreghdfe to ensure exactly the same as what we are using below
* it doesn't matter what dep var you put here since for now just extracting first stage



** Panel A Just prosecutred Defendants
* Col 1 Num Events
ivreghdfe conviction (num_events = ADA_iv) $covars if  ng_immed_all==0,  cluster($clusters ) absorb($FE) first
sum num_events if  ng_immed_all==0

	
* Col 2 Days to disp
ivreghdfe conviction (days_to_last = ADA_iv) $covars if ng_immed_all==0,  cluster($clusters ) absorb($FE) first 
sum days_to_last_event if  ng_immed_all==0

	
* Col 3 Bail set
ivreghdfe conviction (bail_set = ADA_iv) $covars if ng_immed_all==0,  cluster($clusters ) absorb($FE) first 
sum bail_set  if  ng_immed_all==0


* Col 4 Conviction
ivreghdfe num_events  (conviction = ADA_iv) $covars if ng_immed_all==0 ,  cluster($clusters ) absorb($FE) first 
sum conviction if  ng_immed_all==0

** Panel B Full Sample

* Col 1 Num Events
ivreghdfe ng_immed_all (num_events = ADA_iv) $covars,  cluster($clusters ) absorb($FE) first 
sum num_events

	
* Col 2 Days to disp
ivreghdfe ng_immed_all (days_to_last = ADA_iv) $covars,  cluster($clusters ) absorb($FE) first 
sum days_to_last_event

	
* Col 3 Bail set
ivreghdfe ng_immed_all (bail_set = ADA_iv) $covars,  cluster($clusters ) absorb($FE) first 
sum bail_set 


* Col 4 Conviction
ivreghdfe ng_immed_all (conviction = ADA_iv) $covars ,  cluster($clusters ) absorb($FE) first 
sum conviction 

	
* Col 5 DCJIS Match
ivreghdfe ng_immed_all (dcjis_match = ADA_iv) $covars,  cluster($clusters ) absorb($FE) first 
sum dcjis_match 
	
*******************************************************************************
* Table B.5 Main Results Different Time Horizons
*******************************************************************************

foreach length in oneyears threeyears fiveyears {

qui use "$path/suffolk_est_`length'_demos.dta", clear

 *oneyear/oneyears issue
gen anyr_oneyears_arrest2=anyr_oneyear_arrest2
	
local var "anyr_`length'_arrest2"
	
reghdfe `var' ng_immed_all, vce(cluster $clusters) absorb($FE)

reghdfe `var' ng_immed_all $covars, vce(cluster $clusters) absorb($FE)
	
ivreghdfe `var' (ng_immed_all = ADA_iv),  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u

ivreghdfe `var' (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
sum `var' if ng_immed_all==0 
	
* reduced form
reghdfe `var' ADA_iv $covars, vce(cluster $clusters) absorb($FE )

* means
sum `var' if ng_immed_all==0 
ivregcomplier2 if $sample, y(`var') d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE) cluster($clusters) ztrim(1) name(__`var')
sum __`var' if ______=="E[Y | D = 0, Complier]"

* F-test for randomization
qui reghdfe ADA_iv $covars , vce(cluster $clusters) absorb($FE)
testparm $covars

drop ______ __`var' 
}

*******************************************************************************
* Table B.6 Second Stage by Crime Type
*******************************************************************************

global outcomes anyr_${length}_arrest2 anyr_${length}_marrest2  anyr_${length}_farrest2 ///
numr_${length}_arrest2 numr_${length}_marrest2 numr_${length}_farrest2  ///
anyr_${length}_violent anyr_${length}_violentm anyr_${length}_violentf ///
anyr_${length}_mv anyr_${length}_mvm anyr_${length}_mvf ///
anyr_${length}_pettybs anyr_${length}_pettybsm anyr_${length}_pettybsf ///
anyr_${length}_drug anyr_${length}_drugm anyr_${length}_drugf ///
anyr_${length}_uncat anyr_${length}_uncatm anyr_${length}_uncatf


foreach var of varlist $outcomes  {

di "`var'"
ivreghdfe `var' (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE)  savefirst savefprefix(f_) saverf saverfprefix(r_)
* Calculate Anderson Rubin Confidence Intervals (requires that ivreghdfe using both savefirst and saverf with prefixes f_ and r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u
sum `var' if ng_immed_all==0 


}

*******************************************************************************
* Table B.7 Heterogeneous Effects by Demographic Group
*******************************************************************************
gen ss1=male==1
gen ss2=male==0
gen ss3=(age1==1 & age_at_filing2>18 & !mi(age_at_filing2))
gen ss4=age2==1
gen ss5=age3==1
gen ss6=age4==1

gen ss7=predwhi_max==1
gen ss8=predbla_max==1
gen ss9=predhis_max==1
gen ss10=white==1
gen ss11=black==1
gen ss12=hisp==1

* ivreghdfe dosn't work when collinear things are dropped.
* recreate the covariate set, dropping the things that will be perfectly collinear depending on the conditioning set
global covars1 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen   age2 age3 age4 predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "
global covars2 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen   age2 age3 age4  predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "
global covars3 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male  predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "
global covars4 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male  predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "
global covars5 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male  predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "
global covars6 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male  predwhi predbla predhis type_pettybs_only type_mv_only type_drug_only "

global covars7 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4   type_pettybs_only type_mv_only type_drug_only "
global covars8 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4    type_pettybs_only type_mv_only type_drug_only "
global covars9 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4   type_pettybs_only type_mv_only type_drug_only "
global covars10 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4   type_pettybs_only type_mv_only type_drug_only "
global covars11 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4   type_pettybs_only type_mv_only type_drug_only "
global covars12 "number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen male age2 age3 age4   type_pettybs_only type_mv_only type_drug_only "

forvalues i=1(1)12 {
 di "`i'"
 reghdfe anyr_${length}_arrest2 ng_immed_all ${covars`i'} if  ss`i'==1, vce(cluster $clusters) absorb($FE)
 ivreghdfe anyr_${length}_arrest2  (ng_immed_all = ADA_iv) ${covars`i'} if  ss`i'==1,  cluster($clusters ) absorb($FE) 

}
*******************************************************************************
* Table B.8 Second Stage by Crime Type
*******************************************************************************

* Col 1 just repeats main estimate

* Col 2 control for bail iv
ivreghdfe anyr_${length}_arrest2 (ng_immed_all=ADA_iv) $covars bail_iv, cluster($clusters) absorb($FE )

* Col 3 no bail leniency
ivreghdfe anyr_${length}_arrest2 (nobail=bail_iv) $covars, cluster($clusters) absorb($FE )

* Col 4 both IVS
ivreghdfe anyr_${length}_arrest2 (ng_immed_all nobail=ADA_iv bail_iv)  $covars, cluster($clusters) absorb($FE ) 

*******************************************************************************
* Table B.9 Different Time FE/Instruments
*******************************************************************************

* Panel A  Col 1 just repeats main estimate

* Panel A Col 2 Court x Week FE

ivreghdfe anyr_twoyears_arrest2 (ng_immed_all = ADA_iv) $covars,  cluster($clusters ) absorb($FE_week)  first savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u
ivregcomplier2, y(anyr_twoyears_arrest2) d(ng_immed_all) x($covars ) z(ADA_iv) fe($FE_week) cluster($clusters) ztrim(1) name(__ADA_iv)
sum __ADA_iv if ______=="E[Y | D = 0, Complier]"
drop __*


* Panel B uses different IV definitions 

foreach iv in ADA_iv_noresid ADA_iv_shrink ADA_iv_exper ADA_iv_victimless ADA_iv_crime {
	ivreghdfe anyr_${length}_arrest2 (ng_immed_all = `iv') $covars,  cluster($clusters ) absorb($FE)  first savefirst savefprefix(f_) saverf saverfprefix(r_)
qui do arci_poly.do
matrix list ar_l
matrix list ar_u
ivregcomplier2, y(anyr_twoyears_arrest2) d(ng_immed_all) x($covars ) z(`iv') fe($FE_week) cluster($clusters) ztrim(1) name(__ADA_iv)
sum __ADA_iv if ______=="E[Y | D = 0, Complier]"
drop __*
}

*******************************************************************************
* Table B.10 Sample Share by Compliance Type
*******************************************************************************

* first store percentiles of the ADA_iv distribution for referencing below
_pctile ADA_iv, nq(100)
return list r99 r1 r2 r98
gen p1 = r(r1)
gen p99 = r(r99)
gen p2 = r(r2)
gen p98 = r(r98)
gen p1_5 = (p1 + p2)/2
gen p99_5 = (p99 + p98)/2

** linear (just based of coefficients from first stage)
reghdfe ng_immed_all ADA_iv $covars, vce(cluster $clusters) absorb($FE)

gen frac_complier_1 = (_b[_cons] + _b[ADA_iv]*p99) - (_b[_cons] + _b[ADA_iv]*p1)  
gen frac_always_1 = _b[_cons] + _b[ADA_iv]*p1 
gen frac_never_1 = 1 - (_b[_cons] + _b[ADA_iv]*p99) 
gen frac_complier_15 = (_b[_cons] + _b[ADA_iv]*p99_5) - (_b[_cons] + _b[ADA_iv]*p1_5)  
gen frac_always_15 = _b[_cons] + _b[ADA_iv]*p1_5 
gen frac_never_15 = 1 - (_b[_cons] + _b[ADA_iv]*p99_5)
gen frac_complier_2 = (_b[_cons] + _b[ADA_iv]*p98) - (_b[_cons] + _b[ADA_iv]*p2) 
gen frac_always_2 = _b[_cons] + _b[ADA_iv]*p2 
gen frac_never_2 = 1 - (_b[_cons] + _b[ADA_iv]*p98) 

**local linear (same as the one in the picture)

reghdfe ng_immed_all ADA_iv  $covars, absorb($FE) resid
predict resid_ADA_iv, resid

*cap drop resid_ADA
gen resid_ADA = resid_ADA_iv + _b[ADA_iv]*ADA_iv + _b[_cons]

lpoly resid_ADA ADA_iv, nograph degree(1) bw(0.04) gen(fs_x fs_y) n(200) 

*the below by it's very nature is a proxy - maybe the real answer would be to average these two numbers
* but at 2 digit precision it won't matter.
gen tempb99=fs_y if fs_x[_n+1]>p99 & fs_x[_n]<p99
gen tempb1=fs_y if fs_x[_n+1]>p1 & fs_x[_n]<p1
gen tempb99_5=fs_y if fs_x[_n+1]>p99_5 & fs_x[_n]<p99_5
gen tempb1_5=fs_y if fs_x[_n+1]>p1_5 & fs_x[_n]<p1_5
gen tempb98=fs_y if fs_x[_n+1]>p98 & fs_x[_n]<p98
gen tempb2=fs_y if fs_x[_n+1]>p2 & fs_x[_n]<p2

foreach type in b99 b1 b99_5 b1_5 b98 b2 {
	egen `type'=max(temp`type')
}

gen frac_complier_1_nl = b99 - b1  
gen frac_always_1_nl = b1
gen frac_never_1_nl = 1- b99
gen frac_complier_15_nl = b99_5-b1_5
gen frac_always_15_nl = b1_5
gen frac_never_15_nl = 1-b99_5
gen frac_complier_2_nl = b98-b2
gen frac_always_2_nl = b2
gen frac_never_2_nl = 1- b98

summarize frac_*1
summarize frac_*15
summarize frac_*2
summarize frac_*1_nl
summarize frac_*15_nl
summarize frac_*2_nl


*******************************************************************************
* Table B.11 Characteristics of Marginal Defendants 
*******************************************************************************

local  ifs `" "number_counts_correct<=1" "number_counts_correct>1" "number_misd_correct<=1" "number_misd_correct>1" "number_misd_high==0" " number_misd_high>0" "conviction_misd_oneyearpriorc ==0" "conviction_misd_oneyearpriorc ==1" "conviction_felony_oneyearpriorc==0" "conviction_felony_oneyearpriorc==1" "citizen==0"  "citizen==1"  "type_pettybs_plus==1" "type_mv_plus==1" "type_drug==1" "type_uncat==1" "age1==1" "age2==1" "age3==1" "age4==1"  "male==1" "male==0" "predbla_max==1" "predwhi_max==1" "predhis_max==1"  black==1 hisp==1 white==1 "'
local n=1
sum frac_complier_1
local cshare=r(mean)

foreach if of local ifs {
	qui reghdfe ng_immed_all ADA_iv $covars if `if', cluster($clusters) absorb($FE )
	local cshare_sub=(_b[_cons] + _b[ADA_iv]*p99) - (_b[_cons] + _b[ADA_iv]*p1)
	qui count
	local Nall=r(N)
	qui count if `if'
	local pshare=r(N)/`Nall'
	gen pshare_`n'=`pshare'
	gen cshare_`n'=`cshare_sub'*`pshare'/`cshare'
	gen clike_`n'=cshare_`n'/`pshare'
	di "`if'"
	sum pshare_`n'
	sum cshare_`n'
	sum clike_`n'
	local n=`n'+1
}

*******************************************************************************
* Table B.12 Reweighted OLS
*******************************************************************************


* DECILE WEIGHTS

reghdfe ng_immed_all $covars, absorb($FE)
predict p_np, xb

_pctile p_np, nq(10)
gen p_decile = 1 if p_np<=r(r1) 
replace p_decile = 2 if p_np>r(r1) & p_np<=r(r2) 
replace p_decile = 3 if p_np>r(r2) & p_np<=r(r3)  
replace p_decile = 4 if p_np>r(r3) & p_np<=r(r4)  
replace p_decile = 5 if p_np>r(r4) & p_np<=r(r5) 
replace p_decile = 6 if p_np>r(r5) & p_np<=r(r6)  
replace p_decile = 7 if p_np>r(r6) & p_np<=r(r7)  
replace p_decile = 8 if p_np>r(r7) & p_np<=r(r8) 
replace p_decile = 9 if p_np>r(r8) & p_np<=r(r9) 
replace p_decile = 10 if p_np>r(r9) 

tab p_decile, gen(r_decile)

gen complier_weight_10 = .
foreach var of varlist r_decile1-r_decile10 {
reghdfe ng_immed_all ADA_iv if `var'==1, absorb($FE)  
gen frac_complier_`var' = (_b[_cons] + _b[ADA_iv]*p99) - (_b[_cons] + _b[ADA_iv]*p1)  if `var'==1
egen frac_`var' = mean(`var') 
replace complier_weight_10 = frac_complier_`var'/frac_`var' if `var'==1 & complier_weight_10==.  
}

* QUARTILE x PREV CHARGE WEIGHTS
gen hasprev=def_sequence2>1
_pctile p_np, nq(4)
gen p_quartile = 1 if p_np<=r(r1)
replace p_quartile = 2 if p_np>r(r1) & p_np<=r(r2)
replace p_quartile = 3 if p_np>r(r2) & p_np<=r(r3)
replace p_quartile = 4 if p_np>r(r3) & !mi(p_np)

gen r1_prior = p_quartile==1 & hasprev ==1
gen r2_prior = p_quartile==2 & hasprev ==1
gen r3_prior = p_quartile==3 & hasprev==1
gen r4_prior = p_quartile==4 & hasprev ==1
gen r1_noprior = p_quartile==1 & hasprev==0
gen r2_noprior = p_quartile==2 & hasprev ==0
gen r3_noprior = p_quartile==3 & hasprev ==0
gen r4_noprior = p_quartile==4 & hasprev ==0

gen complier_weight = .
foreach var of varlist r1_prior r2_prior r3_prior r4_prior r1_noprior r2_noprior r3_noprior r4_noprior {
reghdfe ng_immed_all ADA_iv if `var'==1, absorb($FE) 
gen frac_complier_`var' = (_b[_cons] + _b[ADA_iv]*p99) - (_b[_cons] + _b[ADA_iv]*p1)  if `var'==1
egen frac_`var' = mean(`var') 
replace complier_weight = frac_complier_`var'/frac_`var' if `var'==1 & complier_weight==.
}


* Col 1  just repeats main estimate

* Col 2  Decline Weights
reghdfe anyr_twoyears_arrest2 ng_immed_all $covars  [w=complier_weight_10], vce(cluster $clusters) absorb($FE)

* Col 3  Quart x Prev Charge Weights
reghdfe anyr_twoyears_arrest2 ng_immed_all $covars [w=complier_weight], vce(cluster $clusters) absorb($FE)

*******************************************************************************
* Table C.1 Missing Race Sum Stats
*******************************************************************************

summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis  $covars  if ng_immed_all==1  & race_miss==1
count if ng_immed_all==1  & race_miss==1
count if ng_immed_all==1 
	
summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars   if ng_immed_all==1 & race_miss==0

summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars  if ng_immed_all==0  & race_miss==1
count if ng_immed_all==0 &  race_miss==1
	
count if ng_immed_all==0 

summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars   if ng_immed_all==0  & race_miss==0
	
*******************************************************************************
* Table C.2 Predicted Race vs Admin Race Data
*******************************************************************************

gen race_other=(black==0 & hispanic==0 & white==0)  & !mi(black) & !mi(white) & !mi(hispanic)
gen predothasi=(predoth+predasi)

sum predbla predhis predwhi predothasi 
sum predbla predhis predwhi predothasi  if black==1 
sum predbla predhis predwhi predothasi if hisp==1 
sum predbla predhis predwhi predothasi if white==1 
sum predbla predhis predwhi predothasi if race_other==1 
sum predbla predhis predwhi  predothasi if race_miss==1 



*******************************************************************************
* Table C.3 Missing ADA at arraignment Sum Stats 
*******************************************************************************

* need to use the version of the data that is not missing first prosecector
use "$path/suffolk_est_${length}_demos_inclmissingfp.dta", clear


summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars    if ng_immed_all==1  & mifp==1
count if ng_immed_all==1 & $sample2_np & mifp==1
count if ng_immed_all==1 & $sample2_np 

summarize  anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars  if ng_immed_all==1 & $sample2_np & mifp==0

summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars   if ng_immed_all==0 & $sample2_np & mifp==1
count if ng_immed_all==0 & $sample2_np & mifp==1
count if ng_immed_all==0 & $sample2_np 
	
		
summarize anyr_twoyears_arrest2 anyr_twoyears_pros anyr_twoyears_dcjis $covars  if ng_immed_all==0 & $sample2_np & mifp==0

*******************************************************************************
* Table C.4 Regressions for missing race or missing ada
*******************************************************************************


* cols 1-3
use "$path/suffolk_est_${length}_demos.dta", clear

gen recidXnonpros = ng_immed_all * anyr_twoyears_arrest2
label var recidXnonpros "Complaint 2 Years X Not Pros"

reghdfe  race_miss  $covars_nodemos , vce(cluster id_prsn_dfndnt) absorb($FE)
reghdfe  race_miss ng_immed_all $covars_nodemos  , vce(cluster id_prsn_dfndnt) absorb($FE)
reghdfe race_miss ng_immed_all anyr_twoyears_arrest2 recidXnonpros $covars_nodemos,  vce(cluster id_prsn_dfndnt) absorb($FE)


* need to use the version of the data that is not missing first prosecector
* for Cols 4-6
use "$path/suffolk_est_${length}_demos_inclmissingfp.dta", clear

cap drop miss_ADA
gen miss_ADA= first_pros==.

gen recidXnonpros = ng_immed_all * anyr_twoyears_arrest2
label var recidXnonpros "Complaint 2 Years X Not Pros"

 *  Need to drop singletons from regression above, below 
 cap drop insample2 singleton2
 gen insample2=(mi(first_pros))
 bys $timeFE insample2: gen singleton2=(_N==1)
 tab singleton2 

global covars_nodemos " number_counts_correct number_misd_correct number_misd_high anyconv_misd_oneyearpriorc  anyconv_felony_oneyearpriorc citizen  type_pettybs_only type_mv_only type_drug_only"

* Cols 4-6
reghdfe  miss_ADA  $covars_nodemos if (n_pros>=30 | mi(n_pros)) & singleton2!=1 & singleton!=1, vce(cluster id_prsn_dfndnt) absorb($FE)
reghdfe  miss_ADA ng_immed_all $covars_nodemos  if  (n_pros>=30 | mi(n_pros)) & singleton2!=1 & singleton!=1, vce(cluster id_prsn_dfndnt) absorb($FE)
reghdfe  miss_ADA ng_immed_all anyr_twoyears_arrest2 recidXnonpros $covars_nodemos  if (n_pros>=30 | mi(n_pros)) & singleton2!=1 & singleton!=1,  vce(cluster id_prsn_dfndnt) absorb($FE)


*******************************************************************************
* Table C.5 Imputation samples proportion missing ADA at arraignment
*******************************************************************************
use "$path/suffolk_est_twoyears_imputation.dta", clear
gen first_pros_imp0=first_pros 
forvalues i=0(1)4{
	gen mifp`i'=mi(first_pros_imp`i')
	gen mifp`i'_np=mi(first_pros_imp`i') if ng_immed_all==1
	gen mifp`i'_p=mi(first_pros_imp`i') if ng_immed_all==0
	
}

label var mifp0 "Main Sample"
label var mifp1 "Imputation 1"
label var mifp2 "Imputation 2"
label var mifp3 "Imputation 3"
label var mifp4 "Imputation 4"


summarize mifp0 mifp1 mifp2 mifp3 mifp4 
summarize mifp0_p mifp1_p mifp2_p mifp3_p mifp4_p 
summarize mifp0_np mifp1_np mifp2_np mifp3_np mifp4_np 
************************************************************************
************************************************************************
************************************************************************
					/* Appendix Figures */
************************************************************************
************************************************************************
************************************************************************


use "$path/suffolk_est_${length}_demos.dta", clear

*******************************************************************************
* Figure B.1 Shrinkage Version
*******************************************************************************

areg ng_immed_all ADA_iv_shrink i.court_dow2, absorb(court_month2)
cap drop resid_ADA*
predict resid_ADA_iv_shrink, resid
gen resid_ADA_shrink = resid_ADA_iv_shrink + _b[ADA_iv_shrink]*ADA_iv_shrink + _b[_cons]


lpoly resid_ADA_shrink ADA_iv_shrink, nograph degree(1) bw(0.04) gen(fs_x_shrink fs_y_shrink) n(100) se(se_shrink)

gen upper_shrink = fs_y_shrink + 1.96*se_shrink
gen lower_shrink = fs_y_shrink - 1.96*se_shrink

areg ng_immed_all ADA_iv i.court_dow2, absorb(court_month2)
cap drop resid_ADA*
predict resid_ADA_iv, resid
gen resid_ADA = resid_ADA_iv + _b[ADA_iv]*ADA_iv + _b[_cons]

cap drop se  fs_x fs_y 
lpoly resid_ADA ADA_iv, nograph degree(1) bw(0.04) gen(fs_x fs_y) n(100) se(se)

cap drop upper lower
gen upper = fs_y + 1.96*se
gen lower = fs_y - 1.96*se

sum ADA_iv, det
local bottom=r(p1)
local top=r(p99)

sum ADA_iv_shrink, det
local bottom_shrink=r(p1)
local top_shrink=r(p99)


twoway hist ADA_iv if abs(ADA_iv)<=.15, width(.005) frac fcolor(gs10) lcolor(none) yaxis(1)  ///
    ||  hist ADA_iv_shrink if abs(ADA_iv_shrink)<=.15, width(.005) frac fcolor(none) lcolor(black)  ///
	|| line fs_y fs_x if fs_x>=`bottom' & fs_x<=`top', lc(black) lw(.6) yaxis(2) ///
	|| line upper fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	|| line lower fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	|| line fs_y_shrink fs_x_shrink if fs_x_shrink>=`bottom_shrink' & fs_x_shrink<=`top_shrink', lc(red) lw(.6) yaxis(2) ///
	|| line upper_shrink fs_x_shrink if fs_x_shrink>=`bottom_shrink' & fs_x_shrink<=`top_shrink', lc(red) lw(.3) yaxis(2) lp(dash) ///
	|| line lower_shrink fs_x_shrink if fs_x_shrink>=`bottom_shrink' & fs_x_shrink<=`top_shrink', lc(red) lw(.3) yaxis(2) lp(dash) ///
	title("", size(large) color(black))  ///
	ytitle("Fraction of Sample", size(medlarge) axis(1))  /// 
	ytitle("Residualized Rate of Nonprosecution", size(medlarge) axis(2)) ///
	xtitle("ADA Leniency", size(medium)) ///
	ylabel(0(.025).10 , nogrid axis(1)) ///
	ylabel(0.2(.05)0.35, nogrid axis(2)) ///
	xlabel(-0.15(.05)0.15 , nogrid) ///
	graphregion(color(white)) bgcolor(white) ///
	 legend(order(1 "Main" 2 "Shrunk" 3 "Main" 6 "Shrunk"))
	 
*******************************************************************************
* Figure B.2 Reduced Form Figure
*******************************************************************************
areg anyr_twoyears_arrest2 ADA_iv i.court_dow2, absorb(court_month2)
cap drop resid_ADA*
predict resid_ADA_iv, resid
gen resid_ADA = resid_ADA_iv + _b[ADA_iv]*ADA_iv + _b[_cons]

cap drop se  fs_x fs_y 
lpoly resid_ADA ADA_iv, nograph degree(1) bw(0.04) gen(fs_x fs_y) n(100) se(se)

cap drop upper lower
gen upper = fs_y + 1.64*se
gen lower = fs_y - 1.64*se


* RF

sum ADA_iv, det
local bottom=r(p1)
local top=r(p99)
twoway hist ADA_iv if abs(ADA_iv)<=.15, width(.005) frac fcolor(gs10) lcolor(white) yaxis(1)  ///
	|| line fs_y fs_x if fs_x>=`bottom' & fs_x<=`top', lc(black) lw(.6) yaxis(2) ///
	|| line upper fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	|| line lower fs_x if fs_x>=`bottom' & fs_x<=`top', lc(gs8) lw(.3) yaxis(2) lp(dash) ///
	title("", size(large) color(black))  ///
	ytitle("Fraction of Sample", size(medlarge) axis(1))  /// 
	ytitle("Residualized Rate of Complaint within 2 Years", size(medlarge) axis(2)) ///
	xtitle("ADA Leniency", size(medium)) ///
	legend(off) ///
	ylabel(0(.025).10 , nogrid axis(1)) ///
	ylabel(0.30(.05)0.37, nogrid axis(2)) ///
	xlabel(-0.15(.05)0.15 , nogrid) ///
	graphregion(color(white)) bgcolor(white) 
	
*******************************************************************************
* Figure B.3 MTE With Different Modeling Assumptions
*******************************************************************************

global bootreps 100

* normal
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2) 
* poly 2
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), link(logit) poly(2) trimsupport(0.01) bootreps($bootreps )  vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2)
* poly 3
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), link(logit) poly(3) trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2)
* poly 4
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), link(logit) poly(3) trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2)
* semipar, 200 grid points
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2) semiparametric gridpoints(200)
* semipar, 200 grid points, separate
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2) semiparametric gridpoints(200) separate
* semipar, 200 grid points, poly
mtefe anyr_twoyears_arrest2 $covars (ng_immed_all = ADA_iv), trimsupport(0.01) bootreps($bootreps ) vce(cluster id_prsn_dfndnt) absorb(i.court_month2 i.court_dow2) semiparametric gridpoints(200) poly(3)


